library(tidyverse)

Reading your data

movies <- read_csv("Data_Transformations/movies_clean.csv")
# View(movies)
summary(movies)
##      budget            homepage               id           keywords        
##  Min.   :        0   Length:4775        Min.   :     5   Length:4775       
##  1st Qu.:   950000   Class :character   1st Qu.:  8998   Class :character  
##  Median : 15000000   Mode  :character   Median : 14536   Mode  :character  
##  Mean   : 29214581                      Mean   : 55988                     
##  3rd Qu.: 40000000                      3rd Qu.: 57206                     
##  Max.   :380000000                      Max.   :459488                     
##                                                                            
##  original_language  original_title       overview           popularity      
##  Length:4775        Length:4775        Length:4775        Min.   :  0.0004  
##  Class :character   Class :character   Class :character   1st Qu.:  4.8066  
##  Mode  :character   Mode  :character   Mode  :character   Median : 13.1191  
##                                                           Mean   : 21.6173  
##                                                           3rd Qu.: 28.4991  
##                                                           Max.   :875.5813  
##                                                                             
##  production_companies production_countries  release_date       
##  Length:4775          Length:4775          Min.   :1916-09-04  
##  Class :character     Class :character     1st Qu.:1999-06-29  
##  Mode  :character     Mode  :character     Median :2005-09-23  
##                                            Mean   :2002-12-14  
##                                            3rd Qu.:2011-02-07  
##                                            Max.   :2017-02-03  
##                                                                
##     revenue             runtime      spoken_languages      status         
##  Min.   :0.000e+00   Min.   :  0.0   Length:4775        Length:4775       
##  1st Qu.:0.000e+00   1st Qu.: 94.0   Class :character   Class :character  
##  Median :1.947e+07   Median :104.0   Mode  :character   Mode  :character  
##  Mean   :8.274e+07   Mean   :107.2                                        
##  3rd Qu.:9.357e+07   3rd Qu.:118.0                                        
##  Max.   :2.788e+09   Max.   :338.0                                        
##                      NA's   :2                                            
##    tagline             title            vote_average      vote_count     
##  Length:4775        Length:4775        Min.   : 0.000   Min.   :    0.0  
##  Class :character   Class :character   1st Qu.: 5.600   1st Qu.:   55.0  
##  Mode  :character   Mode  :character   Median : 6.200   Median :  238.0  
##                                        Mean   : 6.114   Mean   :  694.3  
##                                        3rd Qu.: 6.800   3rd Qu.:  742.0  
##                                        Max.   :10.000   Max.   :13752.0  
##                                                                          
##   release_year  genre_Action    genre_Adventure genre_Fantasy   genre_Science  
##  Min.   :1916   Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  1st Qu.:1999   FALSE:3621      FALSE:3985      FALSE:4351      FALSE:4240     
##  Median :2005   TRUE :1154      TRUE :790       TRUE :424       TRUE :535      
##  Mean   :2002                                                                  
##  3rd Qu.:2011                                                                  
##  Max.   :2017                                                                  
##                                                                                
##  genre_Crime     genre_Drama     genre_Thriller  genre_Animation
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:4079      FALSE:2478      FALSE:3501      FALSE:4541     
##  TRUE :696       TRUE :2297      TRUE :1274      TRUE :234      
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##  genre_Family    genre_Western   genre_Comedy    genre_Romance  
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:4262      FALSE:4693      FALSE:3053      FALSE:3881     
##  TRUE :513       TRUE :82        TRUE :1722      TRUE :894      
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##  genre_Horror    genre_Mystery   genre_History   genre_War      
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:4256      FALSE:4427      FALSE:4578      FALSE:4631     
##  TRUE :519       TRUE :348       TRUE :197       TRUE :144      
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##  genre_Music     genre_Documentary genre_Foreign    genre_TV      
##  Mode :logical   Mode :logical     Mode :logical   Mode :logical  
##  FALSE:4590      FALSE:4665        FALSE:4741      FALSE:4767     
##  TRUE :185       TRUE :110         TRUE :34        TRUE :8        
##                                                                   
##                                                                   
##                                                                   
## 
str(movies)
## spec_tbl_df [4,775 × 40] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ budget              : num [1:4775] 2.37e+08 3.00e+08 2.45e+08 2.50e+08 2.60e+08 2.58e+08 2.60e+08 2.80e+08 2.50e+08 2.50e+08 ...
##  $ homepage            : chr [1:4775] "http://www.avatarmovie.com/" "http://disney.go.com/disneypictures/pirates/" "http://www.sonypictures.com/movies/spectre/" "http://www.thedarkknightrises.com/" ...
##  $ id                  : num [1:4775] 19995 285 206647 49026 49529 ...
##  $ keywords            : chr [1:4775] "[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \"name\": \""| __truncated__ "[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"name\": \"drug abuse\"}, {\"id\": 911, \"name\": \"exotic "| __truncated__ "[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name\": \"based on novel\"}, {\"id\": 4289, \"name\": \"secr"| __truncated__ "[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853, \"name\": \"crime fighter\"}, {\"id\": 949, \"name\": \""| __truncated__ ...
##  $ original_language   : chr [1:4775] "en" "en" "en" "en" ...
##  $ original_title      : chr [1:4775] "Avatar" "Pirates of the Caribbean: At World's End" "Spectre" "The Dark Knight Rises" ...
##  $ overview            : chr [1:4775] "In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes tor"| __truncated__ "Captain Barbossa, long believed to be dead, has come back to life and is headed to the edge of the Earth with W"| __truncated__ "A cryptic message from Bond’s past sends him on a trail to uncover a sinister organization. While M battles pol"| __truncated__ "Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protec"| __truncated__ ...
##  $ popularity          : num [1:4775] 150.4 139.1 107.4 112.3 43.9 ...
##  $ production_companies: chr [1:4775] "[{\"name\": \"Ingenious Film Partners\", \"id\": 289}, {\"name\": \"Twentieth Century Fox Film Corporation\", \"| __truncated__ "[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"name\": \"Jerry Bruckheimer Films\", \"id\": 130}, {\"name"| __truncated__ "[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"name\": \"Danjaq\", \"id\": 10761}, {\"name\": \"B24\", \"id\": 69434}]" "[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"name\": \"Warner Bros.\", \"id\": 6194}, {\"name\": \"DC E"| __truncated__ ...
##  $ production_countries: chr [1:4775] "[{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}, {\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"}]" "[{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}]" "[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"}, {\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}]" "[{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}]" ...
##  $ release_date        : Date[1:4775], format: "2009-12-10" "2007-05-19" ...
##  $ revenue             : num [1:4775] 2.79e+09 9.61e+08 8.81e+08 1.08e+09 2.84e+08 ...
##  $ runtime             : num [1:4775] 162 169 148 165 132 139 100 141 153 151 ...
##  $ spoken_languages    : chr [1:4775] "[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]" "[{\"iso_639_1\": \"en\", \"name\": \"English\"}]" "[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"}, {\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso_"| __truncated__ "[{\"iso_639_1\": \"en\", \"name\": \"English\"}]" ...
##  $ status              : chr [1:4775] "Released" "Released" "Released" "Released" ...
##  $ tagline             : chr [1:4775] "Enter the World of Pandora." "At the end of the world, the adventure begins." "A Plan No One Escapes" "The Legend Ends" ...
##  $ title               : chr [1:4775] "Avatar" "Pirates of the Caribbean: At World's End" "Spectre" "The Dark Knight Rises" ...
##  $ vote_average        : num [1:4775] 7.2 6.9 6.3 7.6 6.1 5.9 7.4 7.3 7.4 5.7 ...
##  $ vote_count          : num [1:4775] 11800 4500 4466 9106 2124 ...
##  $ release_year        : num [1:4775] 2009 2007 2015 2012 2012 ...
##  $ genre_Action        : logi [1:4775] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ genre_Adventure     : logi [1:4775] TRUE TRUE TRUE FALSE TRUE TRUE ...
##  $ genre_Fantasy       : logi [1:4775] TRUE TRUE FALSE FALSE FALSE TRUE ...
##  $ genre_Science       : logi [1:4775] TRUE FALSE FALSE FALSE TRUE FALSE ...
##  $ genre_Crime         : logi [1:4775] FALSE FALSE TRUE TRUE FALSE FALSE ...
##  $ genre_Drama         : logi [1:4775] FALSE FALSE FALSE TRUE FALSE FALSE ...
##  $ genre_Thriller      : logi [1:4775] FALSE FALSE FALSE TRUE FALSE FALSE ...
##  $ genre_Animation     : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Family        : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Western       : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Comedy        : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Romance       : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Horror        : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Mystery       : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_History       : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_War           : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Music         : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Documentary   : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_Foreign       : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ genre_TV            : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   budget = col_double(),
##   ..   homepage = col_character(),
##   ..   id = col_double(),
##   ..   keywords = col_character(),
##   ..   original_language = col_character(),
##   ..   original_title = col_character(),
##   ..   overview = col_character(),
##   ..   popularity = col_double(),
##   ..   production_companies = col_character(),
##   ..   production_countries = col_character(),
##   ..   release_date = col_date(format = ""),
##   ..   revenue = col_double(),
##   ..   runtime = col_double(),
##   ..   spoken_languages = col_character(),
##   ..   status = col_character(),
##   ..   tagline = col_character(),
##   ..   title = col_character(),
##   ..   vote_average = col_double(),
##   ..   vote_count = col_double(),
##   ..   release_year = col_double(),
##   ..   genre_Action = col_logical(),
##   ..   genre_Adventure = col_logical(),
##   ..   genre_Fantasy = col_logical(),
##   ..   genre_Science = col_logical(),
##   ..   genre_Crime = col_logical(),
##   ..   genre_Drama = col_logical(),
##   ..   genre_Thriller = col_logical(),
##   ..   genre_Animation = col_logical(),
##   ..   genre_Family = col_logical(),
##   ..   genre_Western = col_logical(),
##   ..   genre_Comedy = col_logical(),
##   ..   genre_Romance = col_logical(),
##   ..   genre_Horror = col_logical(),
##   ..   genre_Mystery = col_logical(),
##   ..   genre_History = col_logical(),
##   ..   genre_War = col_logical(),
##   ..   genre_Music = col_logical(),
##   ..   genre_Documentary = col_logical(),
##   ..   genre_Foreign = col_logical(),
##   ..   genre_TV = col_logical()
##   .. )
##  - attr(*, "problems")=<externalptr>
glimpse(movies)
## Rows: 4,775
## Columns: 40
## $ budget               <dbl> 2.37e+08, 3.00e+08, 2.45e+08, 2.50e+08, 2.60e+08,…
## $ homepage             <chr> "http://www.avatarmovie.com/", "http://disney.go.…
## $ id                   <dbl> 19995, 285, 206647, 49026, 49529, 559, 38757, 998…
## $ keywords             <chr> "[{\"id\": 1463, \"name\": \"culture clash\"}, {\…
## $ original_language    <chr> "en", "en", "en", "en", "en", "en", "en", "en", "…
## $ original_title       <chr> "Avatar", "Pirates of the Caribbean: At World's E…
## $ overview             <chr> "In the 22nd century, a paraplegic Marine is disp…
## $ popularity           <dbl> 150.43758, 139.08262, 107.37679, 112.31295, 43.92…
## $ production_companies <chr> "[{\"name\": \"Ingenious Film Partners\", \"id\":…
## $ production_countries <chr> "[{\"iso_3166_1\": \"US\", \"name\": \"United Sta…
## $ release_date         <date> 2009-12-10, 2007-05-19, 2015-10-26, 2012-07-16, …
## $ revenue              <dbl> 2787965087, 961000000, 880674609, 1084939099, 284…
## $ runtime              <dbl> 162, 169, 148, 165, 132, 139, 100, 141, 153, 151,…
## $ spoken_languages     <chr> "[{\"iso_639_1\": \"en\", \"name\": \"English\"},…
## $ status               <chr> "Released", "Released", "Released", "Released", "…
## $ tagline              <chr> "Enter the World of Pandora.", "At the end of the…
## $ title                <chr> "Avatar", "Pirates of the Caribbean: At World's E…
## $ vote_average         <dbl> 7.2, 6.9, 6.3, 7.6, 6.1, 5.9, 7.4, 7.3, 7.4, 5.7,…
## $ vote_count           <dbl> 11800, 4500, 4466, 9106, 2124, 3576, 3330, 6767, …
## $ release_year         <dbl> 2009, 2007, 2015, 2012, 2012, 2007, 2010, 2015, 2…
## $ genre_Action         <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, …
## $ genre_Adventure      <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE,…
## $ genre_Fantasy        <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FAL…
## $ genre_Science        <lgl> TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TR…
## $ genre_Crime          <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FA…
## $ genre_Drama          <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, F…
## $ genre_Thriller       <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, F…
## $ genre_Animation      <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, F…
## $ genre_Family         <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, F…
## $ genre_Western        <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Comedy         <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Romance        <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Horror         <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Mystery        <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_History        <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_War            <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Music          <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Documentary    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Foreign        <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_TV             <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …

Select

select(movies, id, original_title, budget, popularity)
select(movies, -keywords)
select(movies, id, original_title, genre_Action:genre_TV)
select(movies, id, original_title, starts_with("genre"))

Creating the movies_sel dataset

  • id
  • contain “title”
  • original_language
  • budget
  • contains “vote”
  • starts with “release”
  • is a double
  • starts with “genre”
movies_sel <-
  select(movies, 
         id, contains("title"), contains("original"), budget, contains("vote"),
         starts_with("release"), where(is.double), starts_with("genre"))

filter

glimpse(movies_sel)
## Rows: 4,775
## Columns: 32
## $ id                <dbl> 19995, 285, 206647, 49026, 49529, 559, 38757, 99861,…
## $ original_title    <chr> "Avatar", "Pirates of the Caribbean: At World's End"…
## $ title             <chr> "Avatar", "Pirates of the Caribbean: At World's End"…
## $ original_language <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en"…
## $ budget            <dbl> 2.37e+08, 3.00e+08, 2.45e+08, 2.50e+08, 2.60e+08, 2.…
## $ vote_average      <dbl> 7.2, 6.9, 6.3, 7.6, 6.1, 5.9, 7.4, 7.3, 7.4, 5.7, 5.…
## $ vote_count        <dbl> 11800, 4500, 4466, 9106, 2124, 3576, 3330, 6767, 529…
## $ release_date      <date> 2009-12-10, 2007-05-19, 2015-10-26, 2012-07-16, 201…
## $ release_year      <dbl> 2009, 2007, 2015, 2012, 2012, 2007, 2010, 2015, 2009…
## $ popularity        <dbl> 150.43758, 139.08262, 107.37679, 112.31295, 43.92699…
## $ revenue           <dbl> 2787965087, 961000000, 880674609, 1084939099, 284139…
## $ runtime           <dbl> 162, 169, 148, 165, 132, 139, 100, 141, 153, 151, 15…
## $ genre_Action      <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, FAL…
## $ genre_Adventure   <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TR…
## $ genre_Fantasy     <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,…
## $ genre_Science     <lgl> TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE,…
## $ genre_Crime       <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE…
## $ genre_Drama       <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALS…
## $ genre_Thriller    <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALS…
## $ genre_Animation   <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALS…
## $ genre_Family      <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALS…
## $ genre_Western     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Comedy      <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Romance     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Horror      <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Mystery     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_History     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_War         <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Music       <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Documentary <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Foreign     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_TV          <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…

Which movies are action movies?

filter(movies_sel,
       genre_Action)

What movies have a vote average over 7.5?

filter(movies_sel,
       vote_average > 7.5)

What action movies have an average voter rating over 7.5?

filter(movies_sel, 
       genre_Action, 
       vote_average > 7.5)

How many movies have an original language in English, French, Spanish, or Italian? “en”, “fr”, “es”, “it”

filter(movies_sel,
       original_language == "en" | original_language == "fr" | original_language == "es" | original_language == "it")

Given the numbers 1 through 100, which of those numbers are in 1 through 10?

1:100 %in% 1:10
##   [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE
filter(movies_sel,
       original_language %in% c("en", "fr", "es", "it"))
romance_languages <- c("en", "fr", "es", "it", "po")

filter(movies_sel,
       original_language %in% romance_languages)

arrange

What movie is most expensive to produce? Which movie has the highest budget?

arrange(movies_sel,
        desc(budget))
arrange(movies_sel,
        release_year, desc(budget))

mutate

What movie has the highest profit?

mutate(movies_sel,
       profit = revenue - budget)

How does the runtime for each movie compare to the average runtime for all movies?

mutate(movies_sel,
       runtime_diff = runtime - mean(runtime, na.rm = TRUE),
       budget_diff = budget - mean(budget, na.rm = TRUE))

summarize

What is the median and mean budget for all movies?

summarize(movies_sel,
          budget_median = median(budget, na.rm = TRUE),
          budget_mean = mean(budget, na.rm = TRUE))

group_by / ungroup

What was the average budget by release year?

movies_group <- 
  group_by(movies_sel,
           release_year)
summarize(movies_group,
          budget_median = median(budget, na.rm = TRUE),
          budget_mean = mean(budget, na.rm = TRUE))

What movie released in 2001 had the highest budget?

movies_selected <- select(movies, id, original_title, budget, release_year)
movies_filtered <- filter(movies_selected, release_year == 2001)
arrange(movies_filtered, desc(budget))

Piping %>%

movies_sel %>% 
  select(id, original_title, budget, release_year) %>% 
  filter(release_year == 2001) %>% 
  arrange(desc(budget))
movies_sel %>% select(id)

What movie had the highest vote average overall?

  • select
  • mutate
  • filter
  • arrange
  • summarize
  • group_by / ungroup
movies_sel %>% 
  arrange(desc(vote_average))
movies_sel %>% 
  filter(vote_average == max(vote_average))

What movie had the highest vote average for movies released after 2010?

movies_sel %>% 
  filter(release_year > 2010) %>% 
  arrange(desc(vote_average))

What movie had the highest vote average for movies with a vote counter greater than the median released after 2010?

movies_sel %>% 
  filter(release_year > 2010,
         vote_count > median(vote_count, na.rm = TRUE)) %>% 
  arrange(desc(vote_average))

What movie had the highest vote average for movies that made at least double of their budget?

movies %>% 
  filter(budget > 0) %>% 
  mutate(budget_double = budget * 2) %>% 
  filter(revenue >= budget_double) %>% 
  arrange(desc(vote_average))
movies %>% 
  filter(budget > 0) %>% 
  mutate(revenue_percent = (revenue - budget) / budget) %>% 
  filter(revenue_percent >= 2) %>% 
  arrange(desc(vote_average))

How many movies are represented from each language?

movies_sel %>% 
  group_by(original_language) %>% 
  summarize(count = n()) %>% 
  ungroup() %>% 
  arrange(desc(count))
movies_sel %>% 
  count(original_language, sort = T, name = "count")

How much total budget was used across each original language?

movies_sel %>% 
  group_by(original_language) %>% 
  summarize(budget_total = sum(budget)) %>% 
  ungroup() %>% 
  arrange(desc(budget_total))

How did vote averages change for English language films over time?

movies_english_over_time <- 
  movies_sel %>% 
  filter(original_language == "en") %>% 
  group_by(release_year) %>% 
  summarize(vote_average = mean(vote_average)) %>% 
  ungroup()


movies_english_over_time %>% 
  ggplot(aes(x = release_year,
             y = vote_average)) + 
  geom_point()

How many movie titles match their original title? How many did not?

movies_sel %>% 
  mutate(title_match_flag = title == original_title) %>% 
  count(title_match_flag) %>% 
  ggplot(aes(x = n,
             y = title_match_flag)) +
  geom_col()

movies_sel %>% 
  mutate(title_match_flag = title == original_title) %>% 
  count(release_year, title_match_flag)

What was the highest budget movie by release year?

movies_sel %>% 
  group_by(release_year) %>% 
  filter(budget == max(budget)) %>% 
  ungroup() %>% 
  select(release_year, budget, title) %>% 
  arrange(release_year)
library(tidyverse)
movies_sel_sample <- 
  structure(list(id = c(19995, 285, 206647, 49026, 49529, 559), 
                 title = c("Avatar", "Pirates of the Caribbean: At World's End", 
                           "Spectre", "The Dark Knight Rises", "John Carter", "Spider-Man 3"
                 ), original_language = c("en", "en", "en", "en", "en", "en"
                 ), vote_average = c(7.2, 6.9, 6.3, 7.6, 6.1, 5.9)), row.names = c(NA, 
                                                                                   -6L), class = c("tbl_df", "tbl", "data.frame"))

movies_sel_sample %>% 
  group_by(original_language) %>% 
  summarize(vote_average = mean(vote_average, na.rm = T)) %>% 
  ungroup() %>% 
  ggplot(aes(x = vote_average,
             y = original_language)) %>% 
  geom_col()